package top.maof.book.spider.develop.contents.shuhuangge;

import lombok.Data;
import top.maof.book.model.Book;
import top.maof.book.model.Contents;
import top.maof.book.spider.develop.Split;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.model.AfterExtractor;
import us.codecraft.webmagic.model.annotation.ExtractBy;
import us.codecraft.webmagic.model.annotation.HelpUrl;
import us.codecraft.webmagic.model.annotation.TargetUrl;

import java.text.SimpleDateFormat;
import java.util.Date;

/**
 * 书荒阁
 * 站点有防盗链,抓不了
 */
@Deprecated
@Data
@TargetUrl("https://www.shuhuangge.org/\\d+_\\d+")
@HelpUrl({"https://www.shuhuangge.org/wanben/[\\w\\?=/&.]*", "https://www.shuhuangge.org/xuanhuanxiaoshuo/[\\w\\?=/&.]*",
        "https://www.shuhuangge.org/xiuzhenxiaoshuo/[\\w\\?=/&.]*", "https://www.shuhuangge.org/dushixiaoshuo/[\\w\\?=/&.]*",
        "https://www.shuhuangge.org/chuanyuexiaoshuo/[\\w\\?=/&.]*",
        "https://www.shuhuangge.org/wangyouxiaoshuo/[\\w\\?=/&.]*", "https://www.shuhuangge.org/kehuanxiaoshuo/[\\w\\?=/&.]*",
        "https://www.shuhuangge.org/paihangbang/[\\w\\?=/&.]*",
        "https://www.shuhuangge.org/xiaoshuodaquan/[\\w\\?=/&.]*"})
public class Shuhuangge implements AfterExtractor, Split {
    @ExtractBy(value = "//div[@id=maininfo]/div[@id=info]/h1/text()")
    private String name;

    @ExtractBy(value = "//div[@id=maininfo]/div[@id=info]/p[1]/a/text()")
    private String author;

    @ExtractBy(value = "//div[@id=fmimg]/img/@src")
    private String img;

    @ExtractBy(value = "//div[@class=con_top]/a[2]/text()")
    private String classify;//科幻小说

    private Date updateDate;

    private Date insertDate = new Date();

    @ExtractBy(value = "//div[@id=intro]/p[1]/text()")
    private String synopsis;

    private String state;

    private String url;

    private String content;

    private Integer webId = 4;

    public static final String CHARSET = "gbk";

    @Override
    public void afterProcess(Page page) {
        //获取最新章节更新时间   最后更新：2019-04-17 02:11:06
        String updateStr = page.getHtml().xpath("//div[@id=info]/p[3]/text()").toString().substring(7, 17).trim();
        SimpleDateFormat simpleDateFormat = new SimpleDateFormat("yyyy-MM-dd");
        try {
            this.updateDate = simpleDateFormat.parse(updateStr);
            Date today = new Date();
            //如果最近更新时间和抓取当天时间相隔一个月,便默认更新完毕或已断更,保存html
            if ((today.getTime() - this.updateDate.getTime()) > 1000 * 60 * 60 * 24 * 30L) {
                //this.content = page.getHtml().xpath("//div[@id=list]").toString();
                //this.state = "完本";
            } else {
                //this.state = "连载";
            }
        } catch (Exception e) {
            e.printStackTrace();
        }

        //img字段做处理  https://www.shuhuangge.org/files/article/image/9/9716/9716s.jpg
        if (!this.img.startsWith("http")) {
            if (this.img.startsWith("/"))
                //绝对地址
                this.img = "https://www.shuhuangge.org" + this.img;
            else {
                //相对地址
                this.img = page.getUrl().toString().substring(0, page.getUrl().toString().lastIndexOf("/") + 1) + this.img;
            }
        }
        //获取该页面url
        this.setUrl(page.getRequest().getUrl());
    }

    @Override
    public Book getBook() {
        Book book = new Book();
        book.setAuthor(this.author);
        book.setState(this.state);
        book.setImg(this.img);
        book.setName(this.name);
        book.setInsertDate(this.insertDate);
        book.setClassify(this.classify);
        book.setSynopsis(this.synopsis);
        book.setUpdateDate(this.updateDate);
        return book;
    }

    @Override
    public Contents getContents() {
        Contents contents = new Contents();
        contents.setUrl(this.url);
        contents.setContent(this.content);
        contents.setWebId(this.webId);
        contents.setUpdateDate(this.updateDate);
        return contents;
    }
}